## ---------------------------
##
## Script name: process_city_geographies.R
##
## Description: 
##
## ---------------------------
## DEPENDENCIES

library(stringr)
library(dplyr)
library(tidyr)
library(sf)
library(tigris)
library(ggplot2)

## ---------------------------


# list of 100 cities to be included
city_list <- read.csv(paste(file_locations$current_fp, file_locations$Helpers$`City List`, sep="/"))

process_city_geographies <- function(file_locations, overwrite) {
  #'
  #'@description Collects all 2010 census tracts within the
  #'census-place boundaries of 100 cities.
  #'
  #'@param file_locations list. File locations loaded from
  #'file_locations.R
  #'@param overwrite boolean. Determines if already processed 
  #'file should be overwritten.
  #'
  #'@return NULL; saves the census tracts and saves out 
  #'plots of all cities tracts & place boundaries.

  output_fp <- paste(file_locations$current_fp, file_locations$Nation$Geographies, sep="/")

  if (overwrite | !file.exists(output_fp)) {
    # set up pdf file for maps
    pdf("./figures/Nation/all_city_2010_tract_maps.pdf", onefile = TRUE)
    
    all_city_maps <- list()
    for (row in 1:nrow(city_list)) {
      
      city <- city_list[row,]$city
      state <- city_list[row,]$state
      state_abr <- city_list[row,]$state_abr
      stpl_fips <- str_pad(city_list[row,]$stpl_fips, 7, side="left", pad="0")
      print(paste0(city, ": ", row))
      
      # load tract geographies within city boundary
      if (city == "Chicago") {
        city_map <- .load_city_geography(state_abr, stpl_fips, geography_level="tract", year=2010, filter_place=T)
      } else {
        city_map <- .load_city_geography(state_abr, stpl_fips, geography_level="tract", year=2010, filter_place=T) %>%
          unite("tract_2010", STATE, COUNTY, TRACT, sep="", remove=F)
      }
      
      # load city boundary
      place_geo <- .load_city_geography(state_abr, stpl_fips, geography_level="place", year=2011, filter_place=F)
      
      # plot tract & city geographies
      p <- ggplot(city_map) + geom_sf( fill="lightblue") + 
          geom_sf(data=place_geo,fill=NA, color="red",size=0.5,linetype="dashed") +
          ggtitle(paste(city)) +
          theme(plot.title = element_text(hjust = 0.5, size=20))
      
      plot(p)
  
      city_map <- city_map %>%
        dplyr::select(tract_2010) %>%
        mutate(city = city)
      
      all_city_maps <- append(all_city_maps, list(city_map))
    }
    dev.off()
    all_city_maps <- bind_rows(all_city_maps)
    
    # save all tracts within the 100 city boundaries to a single file
    write_sf(all_city_maps, output_fp)
  }

}


#'Return the geography for a city's census
#'place or tracts within a city's census place.
#'
#'@param state_abr character. State abbreviation of city.
#'@param stpl_fips character. FIPS code for city's census place.
#'@param geography_level character. "place" or "tract",
#'the geographic level of the data to return.
#'@param year integer. year of data from which tracts or places
#' should be loaded. (place data is not available before 2011, but
#' 2010 tract data is used for any year before 2020, so if needing
#' to get tracts within the census place boundary, it is okay
#' to use 2011.)
#'
#'@return sf dataframe
#'@export
.load_city_geography <- function(state_abr, stpl_fips, geography_level="place", year=2011, filter_place=F) {

  # place data is only available for years 2011 and after
  if (year >= 2011) {
    
    place_geo <- places(state_abr, year=year) %>%
      filter(GEOID == stpl_fips) %>%
      st_transform(crs=4326)

    if (geography_level=="place") {

      return(place_geo)

    } else if (geography_level=="tract") {

      suppressMessages(

        tracts_df <- tracts(state = state_abr, year=year) %>% st_transform(crs=4326)

      )

    if (filter_place) {
      suppressMessages( {
  
        t <- tracts_df[st_intersects(place_geo, tracts_df, sparse=F),]
        t$intersection_area <- as.numeric(st_area(st_intersection(place_geo, t)))
        t$tract_area <- as.numeric(st_area(t))
        t$fraction_intersecting <- t$intersection_area/t$tract_area
        
        tracts_df <- t %>% filter(fraction_intersecting >= 0.75)
        
      } )
    }

      return(tracts_df)

    }

  } 
  # Special condition for Chicago in 2010; we will get the 
  # exact census tracts from a file provided publicly by the city.
  else if (year == 2010 & stpl_fips == "1714000" & filter_place) {
    
    warning("Returning filtered 2010 tracts.")

    tracts_df <- read_sf("./data/Chicago/geography/geo_export_5edfc102-a1dd-414c-834f-1e8c2fdcc056.shp") %>%
      st_transform(crs='WGS84') %>%
      dplyr::select(tract_2010 = geoid10)

    return(tracts_df)
    
  } 
  # when we need non-Chicago census tracts for a year <= 2010;
  # we will just need to use 2011 place boundaries for 
  # filtering census tracts
  else {

    place_geo <- places(state_abr, year=2011) %>%
      filter(GEOID == stpl_fips) %>%
      st_transform(crs=4326)

    suppressMessages(

      tracts_df <- tracts(state = state_abr, year=year, cb=T) %>% st_transform(crs=4326)

    )

    if (filter_place) {
      suppressMessages( {
  
        t <- tracts_df[st_intersects(place_geo, tracts_df, sparse=F),]
        t$intersection_area <- as.numeric(st_area(st_intersection(place_geo, t)))
        t$tract_area <- as.numeric(st_area(t))
        t$fraction_intersecting <- t$intersection_area/t$tract_area
        
        tracts_df <- t %>% filter(fraction_intersecting >= 0.75)
        
      } )
    }
    
    return(tracts_df)

  }

}







